Libraries I may use called
library(tidyverse)
# install for visualizations
library(ggplot2)
# install to combine date and time
library(lubridate)
# for melting a df
library(reshape)
Reading in the first dataset, perceived health status.
perceived_health_status <- read_csv("../data/perceived_health_status.csv")
dnmk <- read_csv("../data/denmark_only_phs.csv")
perceived_health_status_once <- read_csv("../data/perceived_health_status_once.csv")
Inspecting the denmark only data:
dnmk %>%
filter(Sex == "Total")
It appears that then
Inspecting the perceived health data:
perceived_health_status %>%
filter(`Reference area` == "Denmark")
Inspect original perceived health data:
perceived_health_status_once%>%
filter(`Reference area` == "Denmark")
Filtering.
perceived_health_status_stripped <- perceived_health_status %>%
filter(TIME_PERIOD == 2022) %>%
filter(REF_AREA == "AUT") %>%
filter(Sex == "Total") %>%
filter(Age == "15 years or over")
perceived_health_status_stripped
NA
As it appears that after 2007, the number of observations are more
significant in number, I will limit my data to 2007 and later. But,
since it appears the number of observations drops off in 2024, I will
limit my data to a range of 2007-2023. As well, I want to capture all
genders and ages.
# input code to limit year range, age range, and gender in perceived health status dataset
phs <- perceived_health_status %>%
filter(TIME_PERIOD == c(2007:2023)) %>%
filter(Sex == "Total") #%>%
# filter(Age == "15 years or over")
phs
NA
It turns out that Denmark’s data comes only from a population of
15-24 year old, which would be excluded when I filter for age range
collective/Total.
perceived_health_status_once %>%
filter(TIME_PERIOD == c(2007:2023)) %>%
filter(Sex == "Total") %>%
filter(`Reference area` == "Denmark")
NA
Inspect the dataset for the number of years it covers.
barplot(table(perceived_health_status$TIME_PERIOD), main = "number of observations of year in the data")

NA
NA
Inspecting the data for balance in the health status column.
phs %>%
group_by(HEALTH_STATUS) %>%
summarize(n=n())
NA
Which countries are most heavily represented in the data? Denmark was
selected to be included I will download a Denmark only data and
investigate why it is no longer located in this data.
phs %>%
group_by(`Reference area`) %>%
summarize(n=n())
NA
phs %>%
group_by(AGE) %>%
summarize(n=n())
NA
# education_level <- read_csv("../data/educational_attainment_distribution_age_gender.csv")
# education_levels_defined <- read_csv("../data/educational_attainment_distribution.csv")
education_levels_three <- read_csv("../data/educational_attainment.csv")
# ISCED11A_5T8 = Tertiary education
# ISCED11A_3_4 = Upper secondary or post-secondary non-tertiary education
# ISCED11A_0T2 = Below upper secondary education
education_levels_three
NA
Verify that there are simply three categories for the education level
attained and Education attainment level columns
sort(unique(education_levels_three$ATTAINMENT_LEV))
[1] "ISCED11A_0T2" "ISCED11A_3_4" "ISCED11A_5T8"
sort(unique(education_levels_three$`Educational attainment level`))
[1] "Below upper secondary education" "Tertiary education"
[3] "Upper secondary or post-secondary non-tertiary education"
sort(unique(education_levels_three$STATISTICAL_OPERATION))
[1] "OBS" "SE"
I want to use the observed values and not the standard error values
at this time.
# phs_obs <- phs %>%
# filter()
el_third <- education_levels_three %>%
filter(Sex == "Total") %>%
filter(Age == "From 25 to 64 years") %>%
filter(TIME_PERIOD == 2010) %>%
filter(OBS_STATUS == "A") %>%
filter(REF_AREA == "AUT") %>%
filter(STATISTICAL_OPERATION == "OBS")
el_third
NA
el_secondary <- education_levels_defined %>%
filter(Sex == "Total") %>%
filter(Age == "From 25 to 64 years") %>%
filter(TIME_PERIOD == 2010) %>%
filter(OBS_STATUS == "A") %>%
filter(REF_AREA == "AUT")
el_secondary
NA
# sort(unique(el_once$OBS_VALUE))
wellbeing_social<- read_csv("../data/current_wellbeing_exp.csv")
# sort(unique(el_once$`Educational attainment level`))
# sort(unique(el_secondary$`Educational attainment level`))
# el_once %>%
# group_by(`Educational attainment level`) %>%
# summarize(n = n())
# barplot(table(el_once$`Educational attainment level`), main = "number of observations of that education level in the data")
safety_regions <- read_csv("../data/safety_regions.csv")
safety_regions %>%
filter(TIME_PERIOD == 2010) %>%
filter(REF_AREA == "AUT") #%>%
# filter(Sex == "Total") %>%
# filter(Age == "15 years or over")
wellbeing_social %>%
filter(TIME_PERIOD == 2022) %>%
filter(REF_AREA == "AUT") #%>%
# filter(Unit)
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KTGlicmFyaWVzIEkgbWF5IHVzZSBjYWxsZWQNCmBgYHtyfQ0KDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCiMgaW5zdGFsbCBmb3IgdmlzdWFsaXphdGlvbnMgDQpsaWJyYXJ5KGdncGxvdDIpDQojIGluc3RhbGwgdG8gY29tYmluZSBkYXRlIGFuZCB0aW1lDQpsaWJyYXJ5KGx1YnJpZGF0ZSkNCiMgZm9yIG1lbHRpbmcgYSBkZg0KbGlicmFyeShyZXNoYXBlKQ0KDQpgYGANClJlYWRpbmcgaW4gdGhlIGZpcnN0IGRhdGFzZXQsIHBlcmNlaXZlZCBoZWFsdGggc3RhdHVzLg0KIA0KYGBge3J9DQpwZXJjZWl2ZWRfaGVhbHRoX3N0YXR1cyA8LSByZWFkX2NzdigiLi4vZGF0YS9wZXJjZWl2ZWRfaGVhbHRoX3N0YXR1cy5jc3YiKQ0KZG5tayA8LSByZWFkX2NzdigiLi4vZGF0YS9kZW5tYXJrX29ubHlfcGhzLmNzdiIpDQpwZXJjZWl2ZWRfaGVhbHRoX3N0YXR1c19vbmNlIDwtIHJlYWRfY3N2KCIuLi9kYXRhL3BlcmNlaXZlZF9oZWFsdGhfc3RhdHVzX29uY2UuY3N2IikNCg0KYGBgDQpJbnNwZWN0aW5nIHRoZSBkZW5tYXJrIG9ubHkgZGF0YToNCg0KYGBge3J9DQpkbm1rICU+JSANCiAgZmlsdGVyKFNleCA9PSAiVG90YWwiKQ0KYGBgDQoNCkl0IGFwcGVhcnMgdGhhdCB0aGVuIA0KDQoNCkluc3BlY3RpbmcgdGhlIHBlcmNlaXZlZCBoZWFsdGggZGF0YToNCg0KYGBge3J9DQpwZXJjZWl2ZWRfaGVhbHRoX3N0YXR1cyAlPiUgDQogIGZpbHRlcihgUmVmZXJlbmNlIGFyZWFgID09ICJEZW5tYXJrIikNCmBgYA0KDQpJbnNwZWN0IG9yaWdpbmFsIHBlcmNlaXZlZCBoZWFsdGggZGF0YToNCmBgYHtyfQ0KcGVyY2VpdmVkX2hlYWx0aF9zdGF0dXNfb25jZSU+JSANCiAgZmlsdGVyKGBSZWZlcmVuY2UgYXJlYWAgPT0gIkRlbm1hcmsiKQ0KYGBgDQoNCg0KRmlsdGVyaW5nLg0KDQpgYGB7cn0NCg0KcGVyY2VpdmVkX2hlYWx0aF9zdGF0dXNfc3RyaXBwZWQgPC0gcGVyY2VpdmVkX2hlYWx0aF9zdGF0dXMgJT4lIA0KICBmaWx0ZXIoVElNRV9QRVJJT0QgPT0gMjAyMikgJT4lIA0KICBmaWx0ZXIoUkVGX0FSRUEgPT0gIkFVVCIpICU+JSANCiAgZmlsdGVyKFNleCA9PSAiVG90YWwiKSAlPiUgDQogIGZpbHRlcihBZ2UgPT0gIjE1IHllYXJzIG9yIG92ZXIiKSAgDQoNCnBlcmNlaXZlZF9oZWFsdGhfc3RhdHVzX3N0cmlwcGVkDQoNCmBgYA0KQXMgaXQgYXBwZWFycyB0aGF0IGFmdGVyIDIwMDcsIHRoZSBudW1iZXIgb2Ygb2JzZXJ2YXRpb25zIGFyZSBtb3JlIHNpZ25pZmljYW50IGluIG51bWJlciwgSSB3aWxsIGxpbWl0IG15IGRhdGEgdG8gMjAwNyBhbmQgbGF0ZXIuIEJ1dCwgc2luY2UgaXQgYXBwZWFycyB0aGUgbnVtYmVyIG9mIG9ic2VydmF0aW9ucyBkcm9wcyBvZmYgaW4gMjAyNCwgSSB3aWxsIGxpbWl0IG15IGRhdGEgdG8gYSByYW5nZSBvZiAyMDA3LTIwMjMuIEFzIHdlbGwsIEkgd2FudCB0byBjYXB0dXJlIGFsbCBnZW5kZXJzIGFuZCBhZ2VzLiANCg0KYGBge3J9DQojIGlucHV0IGNvZGUgdG8gbGltaXQgeWVhciByYW5nZSwgYWdlIHJhbmdlLCBhbmQgZ2VuZGVyIGluIHBlcmNlaXZlZCBoZWFsdGggc3RhdHVzIGRhdGFzZXQNCnBocyA8LSBwZXJjZWl2ZWRfaGVhbHRoX3N0YXR1cyAlPiUgDQogIGZpbHRlcihUSU1FX1BFUklPRCA9PSBjKDIwMDc6MjAyMykpICU+JSANCiAgZmlsdGVyKFNleCA9PSAiVG90YWwiKSAjJT4lIA0KICAjIGZpbHRlcihBZ2UgPT0gIjE1IHllYXJzIG9yIG92ZXIiKQ0KDQpwaHMNCg0KYGBgDQpJdCB0dXJucyBvdXQgdGhhdCBEZW5tYXJrJ3MgZGF0YSBjb21lcyBvbmx5IGZyb20gYSBwb3B1bGF0aW9uIG9mIDE1LTI0IHllYXIgb2xkLCB3aGljaCB3b3VsZCBiZSBleGNsdWRlZCB3aGVuIEkgZmlsdGVyIGZvciBhZ2UgcmFuZ2UgY29sbGVjdGl2ZS9Ub3RhbC4NCmBgYHtyfQ0KDQpwZXJjZWl2ZWRfaGVhbHRoX3N0YXR1c19vbmNlICU+JSANCiAgZmlsdGVyKFRJTUVfUEVSSU9EID09IGMoMjAwNzoyMDIzKSkgJT4lIA0KICBmaWx0ZXIoU2V4ID09ICJUb3RhbCIpICU+JSANCiAgZmlsdGVyKGBSZWZlcmVuY2UgYXJlYWAgPT0gIkRlbm1hcmsiKQ0KDQpgYGANCg0KDQpJbnNwZWN0IHRoZSBkYXRhc2V0IGZvciB0aGUgbnVtYmVyIG9mIHllYXJzIGl0IGNvdmVycy4NCg0KYGBge3J9DQoNCmJhcnBsb3QodGFibGUocGVyY2VpdmVkX2hlYWx0aF9zdGF0dXMkVElNRV9QRVJJT0QpLCBtYWluID0gIm51bWJlciBvZiBvYnNlcnZhdGlvbnMgb2YgeWVhciBpbiB0aGUgZGF0YSIpDQoNCg0KYGBgDQpJbnNwZWN0aW5nIHRoZSBkYXRhIGZvciBiYWxhbmNlIGluIHRoZSBoZWFsdGggc3RhdHVzIGNvbHVtbi4NCg0KYGBge3J9DQoNCnBocyAlPiUgDQogIGdyb3VwX2J5KEhFQUxUSF9TVEFUVVMpICU+JSANCiAgc3VtbWFyaXplKG49bigpKQ0KDQpgYGANCg0KV2hpY2ggY291bnRyaWVzIGFyZSBtb3N0IGhlYXZpbHkgcmVwcmVzZW50ZWQgaW4gdGhlIGRhdGE/IERlbm1hcmsgd2FzIHNlbGVjdGVkIHRvIGJlIGluY2x1ZGVkIEkgd2lsbCBkb3dubG9hZCBhIERlbm1hcmsgb25seSBkYXRhIGFuZCBpbnZlc3RpZ2F0ZSB3aHkgaXQgaXMgbm8gbG9uZ2VyIGxvY2F0ZWQgaW4gdGhpcyBkYXRhLiANCg0KYGBge3J9DQoNCnBocyAlPiUgDQogIGdyb3VwX2J5KGBSZWZlcmVuY2UgYXJlYWApICU+JQ0KICBzdW1tYXJpemUobj1uKCkpDQoNCmBgYA0KDQpgYGB7cn0NCnBocyAlPiUgDQogIGdyb3VwX2J5KEFHRSkgJT4lDQogIHN1bW1hcml6ZShuPW4oKSkNCg0KYGBgDQoNCg0KDQoNCg0KYGBge3J9DQoNCiMgZWR1Y2F0aW9uX2xldmVsIDwtIHJlYWRfY3N2KCIuLi9kYXRhL2VkdWNhdGlvbmFsX2F0dGFpbm1lbnRfZGlzdHJpYnV0aW9uX2FnZV9nZW5kZXIuY3N2IikNCiMgZWR1Y2F0aW9uX2xldmVsc19kZWZpbmVkIDwtIHJlYWRfY3N2KCIuLi9kYXRhL2VkdWNhdGlvbmFsX2F0dGFpbm1lbnRfZGlzdHJpYnV0aW9uLmNzdiIpDQplZHVjYXRpb25fbGV2ZWxzX3RocmVlIDwtIHJlYWRfY3N2KCIuLi9kYXRhL2VkdWNhdGlvbmFsX2F0dGFpbm1lbnQuY3N2IikNCg0KYGBgDQoNCmBgYHtyfQ0KDQojIElTQ0VEMTFBXzVUOCA9IFRlcnRpYXJ5IGVkdWNhdGlvbg0KIyBJU0NFRDExQV8zXzQgPSBVcHBlciBzZWNvbmRhcnkgb3IgcG9zdC1zZWNvbmRhcnkgbm9uLXRlcnRpYXJ5IGVkdWNhdGlvbg0KIyBJU0NFRDExQV8wVDIgPSBCZWxvdyB1cHBlciBzZWNvbmRhcnkgZWR1Y2F0aW9uDQplZHVjYXRpb25fbGV2ZWxzX3RocmVlDQoNCmBgYA0KVmVyaWZ5IHRoYXQgdGhlcmUgYXJlIHNpbXBseSB0aHJlZSBjYXRlZ29yaWVzIGZvciB0aGUgZWR1Y2F0aW9uIGxldmVsIGF0dGFpbmVkIGFuZCAgRWR1Y2F0aW9uIGF0dGFpbm1lbnQgbGV2ZWwgY29sdW1ucw0KYGBge3J9DQoNCnNvcnQodW5pcXVlKGVkdWNhdGlvbl9sZXZlbHNfdGhyZWUkQVRUQUlOTUVOVF9MRVYpKQ0Kc29ydCh1bmlxdWUoZWR1Y2F0aW9uX2xldmVsc190aHJlZSRgRWR1Y2F0aW9uYWwgYXR0YWlubWVudCBsZXZlbGApKQ0KDQpgYGANCg0KYGBge3J9DQpzb3J0KHVuaXF1ZShlZHVjYXRpb25fbGV2ZWxzX3RocmVlJFNUQVRJU1RJQ0FMX09QRVJBVElPTikpDQpgYGANCkkgd2FudCB0byB1c2UgdGhlIG9ic2VydmVkIHZhbHVlcyBhbmQgbm90IHRoZSBzdGFuZGFyZCBlcnJvciB2YWx1ZXMgYXQgdGhpcyB0aW1lLiANCmBgYHtyfQ0KDQojIHBoc19vYnMgPC0gcGhzICU+JSANCiAgIyBmaWx0ZXIoKQ0KDQpgYGANCg0KDQoNCmBgYHtyfQ0KZWxfdGhpcmQgPC0gZWR1Y2F0aW9uX2xldmVsc190aHJlZSAlPiUgDQogIGZpbHRlcihTZXggPT0gIlRvdGFsIikgJT4lIA0KICBmaWx0ZXIoQWdlID09ICJGcm9tIDI1IHRvIDY0IHllYXJzIikgJT4lIA0KICBmaWx0ZXIoVElNRV9QRVJJT0QgPT0gMjAxMCkgJT4lIA0KICBmaWx0ZXIoT0JTX1NUQVRVUyA9PSAiQSIpICU+JSANCiAgZmlsdGVyKFJFRl9BUkVBID09ICJBVVQiKSAlPiUgDQogIGZpbHRlcihTVEFUSVNUSUNBTF9PUEVSQVRJT04gPT0gIk9CUyIpDQoNCmVsX3RoaXJkDQoNCmBgYA0KDQoNCmBgYHtyfQ0KDQplbF9zZWNvbmRhcnkgPC0gZWR1Y2F0aW9uX2xldmVsc19kZWZpbmVkICU+JSANCiAgZmlsdGVyKFNleCA9PSAiVG90YWwiKSAlPiUgDQogIGZpbHRlcihBZ2UgPT0gIkZyb20gMjUgdG8gNjQgeWVhcnMiKSAlPiUgDQogIGZpbHRlcihUSU1FX1BFUklPRCA9PSAyMDEwKSAlPiUgDQogIGZpbHRlcihPQlNfU1RBVFVTID09ICJBIikgJT4lIA0KICBmaWx0ZXIoUkVGX0FSRUEgPT0gIkFVVCIpDQoNCmVsX3NlY29uZGFyeQ0KDQpgYGANCg0KYGBge3J9DQojIHNvcnQodW5pcXVlKGVsX29uY2UkT0JTX1ZBTFVFKSkNCndlbGxiZWluZ19zb2NpYWw8LSByZWFkX2NzdigiLi4vZGF0YS9jdXJyZW50X3dlbGxiZWluZ19leHAuY3N2IikNCg0KYGBgDQoNCmBgYHtyfQ0KDQojIHNvcnQodW5pcXVlKGVsX29uY2UkYEVkdWNhdGlvbmFsIGF0dGFpbm1lbnQgbGV2ZWxgKSkNCg0KDQoNCmBgYA0KDQpgYGB7cn0NCiMgc29ydCh1bmlxdWUoZWxfc2Vjb25kYXJ5JGBFZHVjYXRpb25hbCBhdHRhaW5tZW50IGxldmVsYCkpDQpgYGANCg0KYGBge3J9DQoNCiMgZWxfb25jZSAgICU+JSAgDQogICMgZ3JvdXBfYnkoYEVkdWNhdGlvbmFsIGF0dGFpbm1lbnQgbGV2ZWxgKSAlPiUNCiAgIyBzdW1tYXJpemUobiA9IG4oKSkNCg0KYGBgDQoNCg0KYGBge3J9DQoNCiMgYmFycGxvdCh0YWJsZShlbF9vbmNlJGBFZHVjYXRpb25hbCBhdHRhaW5tZW50IGxldmVsYCksIG1haW4gPSAibnVtYmVyIG9mIG9ic2VydmF0aW9ucyBvZiB0aGF0IGVkdWNhdGlvbiBsZXZlbCBpbiB0aGUgZGF0YSIpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KDQpzYWZldHlfcmVnaW9ucyA8LSByZWFkX2NzdigiLi4vZGF0YS9zYWZldHlfcmVnaW9ucy5jc3YiKQ0KDQoNCmBgYA0KYGBge3J9DQpzYWZldHlfcmVnaW9ucyAlPiUgDQogIGZpbHRlcihUSU1FX1BFUklPRCA9PSAyMDEwKSAlPiUgDQogIGZpbHRlcihSRUZfQVJFQSA9PSAiQVVUIikgIyU+JSANCiAgIyBmaWx0ZXIoU2V4ID09ICJUb3RhbCIpICU+JSANCiAgIyBmaWx0ZXIoQWdlID09ICIxNSB5ZWFycyBvciBvdmVyIikgIA0KDQoNCmBgYA0KDQpgYGB7cn0NCg0Kd2VsbGJlaW5nX3NvY2lhbCAlPiUgDQogIGZpbHRlcihUSU1FX1BFUklPRCA9PSAyMDIyKSAlPiUgDQogIGZpbHRlcihSRUZfQVJFQSA9PSAiQVVUIikgIyU+JSANCiAgIyBmaWx0ZXIoVW5pdCkNCmBgYA0KDQpgYGB7cn0NCg0KDQpgYGANCg0KYGBge3J9DQoNCmBgYA0KDQo=